-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU][NewPM] Port "GCNRewritePartialRegUses" pass to NPM #126024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Vikram Hegde (vikramRH) ChangesFull diff: https://github.com/llvm/llvm-project/pull/126024.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2c6b8828d5cfbab..048c39464be5418 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
FunctionPass *createAMDGPUSetWavePriorityPass();
void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
-void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
+void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
extern char &GCNRewritePartialRegUsesID;
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 41ad1445f47e927..62716a9d725d90d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
#endif
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
+MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
@@ -119,7 +120,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fffd30b26dc1d50..dedfcfcb4d5b4f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -33,6 +33,7 @@
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNPreRALongBranchReg.h"
+#include "GCNRewritePartialRegUses.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
@@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNNSAReassignPass(*PR);
initializeGCNPreRAOptimizationsPass(*PR);
initializeGCNPreRALongBranchRegLegacyPass(*PR);
- initializeGCNRewritePartialRegUsesPass(*PR);
+ initializeGCNRewritePartialRegUsesLegacyPass(*PR);
initializeGCNRegPressurePrinterPass(*PR);
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 077ccf36ea4fb28..cccb71d5f8e7287 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -28,6 +28,7 @@
/// calculation and creates more possibilities for the code unaware of lanemasks
//===----------------------------------------------------------------------===//
+#include "GCNRewritePartialRegUses.h"
#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
@@ -44,25 +45,7 @@ using namespace llvm;
namespace {
-class GCNRewritePartialRegUses : public MachineFunctionPass {
-public:
- static char ID;
- GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override {
- return "Rewrite Partial Register Uses";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<LiveIntervalsWrapperPass>();
- AU.addPreserved<SlotIndexesWrapperPass>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
-private:
+class GCNRewritePartialRegUsesImpl {
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
const TargetInstrInfo *TII;
@@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
/// Cache for getAllocatableAndAlignedRegClassMask method:
/// AlignNumBits -> Class bitmask.
mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
+
+public:
+ GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
+ bool run(MachineFunction &MF);
+};
+
+class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rewrite Partial Register Uses";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<LiveIntervalsWrapperPass>();
+ AU.addPreserved<SlotIndexesWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
};
} // end anonymous namespace
// TODO: move this to the tablegen and use binary search by Offset.
-unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
- unsigned Size) const {
+unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
+ unsigned Size) const {
const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
if (Inserted) {
for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
@@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
return I->second;
}
-unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
- unsigned RShift) const {
+unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
+ unsigned RShift) const {
unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
}
-const uint32_t *
-GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
- unsigned SubRegIdx) const {
+const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
+ const TargetRegisterClass *RC, unsigned SubRegIdx) const {
const auto [I, Inserted] =
SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
if (Inserted) {
@@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
return I->second;
}
-const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
+const BitVector &
+GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits) const {
const auto [I, Inserted] =
AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
@@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
}
const TargetRegisterClass *
-GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
+GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
@@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
}
const TargetRegisterClass *
-GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
- SubRegMap &SubRegs) const {
+GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
+ SubRegMap &SubRegs) const {
unsigned CoverSubreg = AMDGPU::NoSubRegister;
unsigned Offset = std::numeric_limits<unsigned>::max();
unsigned End = 0;
@@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
// Only the subrange's lanemasks of the original interval need to be modified.
// Subrange for a covering subreg becomes the main range.
-void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
- Register NewReg,
- SubRegMap &SubRegs) const {
+void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
+ Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
if (!LIS->hasInterval(OldReg))
return;
@@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
}
const TargetRegisterClass *
-GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
+GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
MachineInstr *MI = MO.getParent();
return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
*MI->getParent()->getParent());
}
-bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
+bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
auto Range = MRI->reg_nodbg_operands(Reg);
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
@@ -476,12 +481,10 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
return true;
}
-bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
+bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
TII = MF.getSubtarget().getInstrInfo();
- auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
- LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
Changed |= rewriteReg(Register::index2VirtReg(I));
@@ -489,11 +492,32 @@ bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-char GCNRewritePartialRegUses::ID;
+bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
+ auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+ auto LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+ GCNRewritePartialRegUsesImpl Impl(LIS);
+ return Impl.run(MF);
+}
+
+PreservedAnalyses
+GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
+ if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
+ return PreservedAnalyses::all();
+
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<LiveIntervalsAnalysis>();
+ PA.preserve<SlotIndexesAnalysis>();
+ return PA;
+}
+
+char GCNRewritePartialRegUsesLegacy::ID;
-char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
+char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;
-INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
-INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
"Rewrite Partial Register Uses", false, false)
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
new file mode 100644
index 000000000000000..b2c3190b5c6ba0f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
@@ -0,0 +1,23 @@
+//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNRewritePartialRegUsesPass
+ : public PassInfoMixin<GCNRewritePartialRegUsesPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
index 85d0c054754d03d..ede043ce73a47fb 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
--- |
define void @test_vreg_96_w64() !dbg !5 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 037f39df8c3e06e..79e9ce27376950a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 07e49dcdafd8cc3..33007ee8a7c38d6 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
---
name: test_subregs_composition_vreg_1024
tracksRegLiveness: true
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| #include "GCNIterativeScheduler.h" | ||
| #include "GCNPreRALongBranchReg.h" | ||
| #include "GCNPreRAOptimizations.h" | ||
| #include "GCNRewritePartialRegUses.h" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to be unused
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Required by the includes of AMDGPUPassRegistry.def
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yes
No description provided.